import numpy as npimport pandas as pdimport osimport csvimport composite as ciimport inspectfrom functools importreduceimport plotly.express as pxfrom sklearn.preprocessing import RobustScalerimport plotlyimport refrom scipy.stats import pearsonrimport plotly.graph_objects as goimport datetime as dtfrom scipy import statsfrom plotly.subplots import make_subplotsimport matplotlib.pyplot as pltfrom pandas.plotting import table# from xml.etree import ElementTree as ET# import requests# import json# import webbrowser
Code
# for VSC users, Latex in Plotly is not working# Workaround from https://github.com/microsoft/vscode-jupyter/issues/8131from IPython.display import display, HTMLplotly.offline.init_notebook_mode()display( HTML('<script type="text/javascript" async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_SVG"></script>' ))# For plotly figures to show on printed versionplotly.offline.init_notebook_mode()
Code
# show all output from cellsfrom IPython.core.interactiveshell import InteractiveShellInteractiveShell.ast_node_interactivity ="all"# last_expr# show all columnspd.set_option("display.max_columns", None)
Code
# create output folder if not thereifnot os.path.exists("../output"): os.makedirs("../output")
Country names dict
Code
# load countries dictionarycountry_to_abbrev = ( pd.read_csv("../data/countryAbbr.csv", header=None, index_col=0).squeeze().to_dict())# invert the dictionaryabbrev_to_country =dict(map(reversed, country_to_abbrev.items()))# add additional abbreviationsabbrev_to_country["EL"] ="Greece"abbrev_to_country["GB"] ="United Kingdom"allEurope = pd.read_csv("../data/Countries-Europe.csv")allEurope = allEurope.name.to_list()# fmt: off# countries included in the assessmentcountries = ['Belgium','Germany','Denmark','Estonia','Spain','Finland','France','Ireland','Lithuania','Latvia','Netherlands','Poland','Portugal','Sweden','United Kingdom']# countries used to calculate some variable targets (EU + UK)countriesEU = ["Belgium", "Bulgaria", "Cyprus", "Greece", "Germany", "Croatia","Italy", "Denmark", "Estonia", "Spain", "Finland", "France","Ireland", "Lithuania", "Latvia", "Malta", "Netherlands", "Poland","Portugal", "Romania", "Sweden", "United Kingdom",]countryAbb = [ x if x notin country_to_abbrev else country_to_abbrev[x] for x in countries]# fmt: on
Define general functions
Code
def missingCountries(df, countries=countries):### check missing countries in dataset missing = []for i in countries:if i notin df.index.unique(): missing.append(i)iflen(missing) >0:print("Missing countries:\n", "\n".join(missing))else:print("No missing countries")def negativeValues(ds):### check negative values in datasetif ds[ds <0].count().sum() >0:# replace negative values with 0 ds[ds <0] =0print("Negative values in dataset replaced with 0")def calculate_target(df, valueCol, countryCol):""" Calculate target (max/min) for each country based on max/min of 3 top countries after calculating max/min for each country after dropping outliers using Interquartile Range (IQR) proximity rule df: dataframe valueCol: column name of value countryCol: column name of country """# we drop outliers using Interquartile Range (IQR) proximity rule quartile_1, quartile_3 = np.percentile(df[valueCol], [25, 75]) iqr = quartile_3 - quartile_1 lower_bound = quartile_1 -1.5* iqr upper_bound = quartile_3 +1.5* iqr dfNoOutliers = df[~(df[valueCol] > upper_bound) | (df[valueCol] < lower_bound)]# target is max/min of 3 top countries after calculating max/min for each country avgMax3 = ( dfNoOutliers[valueCol] .groupby(dfNoOutliers[countryCol]) .max() .nlargest(3) .mean() ) avgMin3 = ( dfNoOutliers[valueCol] .groupby(dfNoOutliers[countryCol]) .min() .nsmallest(3) .mean() )return avgMax3, avgMin3
Beach litter originating from national land-based sources that ends in the beach (%)
PERCENT
900
EN_MAR_BEALIT_BV
Beach litter originating from national land-based sources that ends in the beach (Tonnes)
TONNES
900
EN_MAR_BEALIT_EXP
Exported beach litter originating from national land-based sources (Tonnes)
TONNES
900
EN_MAR_BEALIT_OP
Beach litter originating from national land-based sources that ends in the ocean (%)
PERCENT
900
EN_MAR_BEALIT_OV
Beach litter originating from national land-based sources that ends in the ocean (Tonnes)
TONNES
900
EN_MAR_CHLANM
Chlorophyll-a anomaly, remote sensing (%)
PERCENT
2784
EN_MAR_CHLDEV
Chlorophyll-a deviations, remote sensing (%)
PERCENT
4131
EN_MAR_PLASDD
Floating plastic debris density (count per km2)
NUMBER
3
14.2.1
EN_SCP_ECSYBA
Number of countries using ecosystem-based approaches to managing marine areas (1 = YES; 0 = NO)
NUMBER
33
14.3.1
ER_OAW_MNACD
Average marine acidity (pH) measured at agreed suite of representative sampling stations
PH
903
14.4.1
ER_H2O_FWTL
Proportion of fish stocks within biologically sustainable levels (not overexploited) (%)
PERCENT
422
14.5.1
ER_MRN_MPA
Average proportion of Marine Key Biodiversity Areas (KBAs) covered by protected areas (%)
PERCENT
6049
14.6.1
ER_REG_UNFCIM
Progress by countries in the degree of implementation of international instruments aiming to combat illegal, unreported and unregulated fishing (level of implementation: 1 lowest to 5 highest)
NUMBER
882
14.7.1
EN_SCP_FSHGDP
Sustainable fisheries as a proportion of GDP
PERCENT
5880
14.a.1
ER_RDE_OSEX
National ocean science expenditure as a share of total research and development funding (%)
PERCENT
159
14.b.1
ER_REG_SSFRAR
Degree of application of a legal/regulatory/policy/institutional framework which recognizes and protects access rights for small-scale fisheries (level of implementation: 1 lowest to 5 highest)
NUMBER
882
14.c.1
ER_UNCLOS_IMPLE
Score for the implementation of UNCLOS and its two implementing agreements (%)
PERCENT
63
ER_UNCLOS_RATACC
Score for the ratification of and accession to UNCLOS and its two implementing agreements (%)
PERCENT
63
Check official data by indicator
Code
# check sdg indicator, change SeriesCode to the one you want to check# Example with Average marine acidity (pH) ER_OAW_MNACDsdg14.loc[ sdg14["GeoAreaName"].str.contains(r"^(?=.*United)(?=.*Kingdom)"), "GeoAreaName"] ="United Kingdom"sdg14check = sdg14[sdg14["GeoAreaName"].isin(countriesEU)]sdg14check = sdg14check[sdg14check["SeriesCode"] =="EN_MAR_CHLDEV"].pivot_table( columns="TimePeriod", index="GeoAreaName", values="Value", aggfunc="mean")# mr = sdg14check.columns[-3:].to_list()sdg14checkmissingCountries(sdg14check)
TimePeriod
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
GeoAreaName
Belgium
1.21206
1.99381
3.47490
11.46279
19.51164
4.08128
1.83826
1.58103
2.37154
1.18214
1.99841
2.43993
1.49730
7.14286
0.94334
NaN
0.39370
0.00000
Bulgaria
4.16893
4.93915
1.72057
9.42693
5.59415
8.67561
1.28027
0.48363
3.09397
1.86093
1.97014
0.36347
0.40892
2.40777
9.28366
2.18754
6.87510
11.11479
Croatia
0.58760
1.98793
0.24971
1.70606
3.22913
3.29982
2.28946
0.38562
2.68934
0.78792
0.51874
0.25081
0.13652
0.87036
0.19819
0.85328
0.46349
0.04665
Cyprus
0.03237
0.67684
5.43525
0.03372
0.04197
0.08737
0.14988
0.07214
0.04735
0.03834
0.04330
0.03607
0.05049
0.12025
0.22297
0.20735
1.48557
0.01967
Denmark
1.86536
2.54946
5.67924
6.44745
3.48344
2.28091
6.44146
3.92073
2.97247
2.79938
6.30996
3.06505
3.57844
1.11015
3.56811
3.87021
2.58100
2.78762
Estonia
1.51921
0.56911
0.51643
0.79730
0.48784
0.64761
0.76199
3.81015
2.30709
1.24961
1.02607
1.16590
1.72836
1.19478
1.43434
1.11537
0.90704
1.01505
Finland
1.51286
0.90536
1.43337
1.63576
0.71646
0.73382
0.39653
6.19252
2.25493
2.44967
2.27454
3.63565
3.19460
2.53959
8.19352
8.50139
2.04491
2.66017
France
3.07679
2.19209
3.20013
2.88260
2.87727
2.34783
4.55513
2.42255
2.44191
6.42791
2.78822
1.35719
1.18126
4.23569
3.37304
2.20585
3.41925
3.67835
Germany
2.82582
2.29924
4.34811
3.19636
2.80721
1.23534
1.78236
2.40504
4.05483
1.62128
3.10226
1.90806
1.66571
1.50827
1.09150
1.38429
2.37366
1.57073
Greece
0.44651
1.55735
1.92005
0.40773
0.46729
0.94987
0.61986
0.41117
0.28912
0.35359
0.98422
0.12179
0.71516
0.62026
0.91862
0.39424
6.19418
0.39594
Ireland
6.20969
1.43425
5.61287
4.22741
10.55514
4.49554
6.26931
11.47734
6.84682
11.75609
10.64502
8.28608
6.95680
5.63686
7.96780
9.16862
7.80383
7.19120
Italy
3.88053
2.26854
0.60136
1.26326
2.48420
2.78981
1.67843
0.45131
4.69379
1.71756
1.21291
0.25095
0.15158
1.58332
0.32512
0.44616
1.74440
0.13029
Latvia
2.64657
1.30798
0.92408
13.45978
1.15164
0.35619
0.37670
3.05887
1.33013
3.31377
0.71945
2.45417
1.54576
1.35641
3.26441
3.45741
2.93909
1.97917
Lithuania
3.65776
0.60938
0.94266
22.07638
0.55006
3.25111
0.69590
2.31445
5.50424
0.38390
1.68178
2.45627
2.09216
1.07521
1.02619
1.03005
1.80514
0.52029
Malta
0.69559
0.06631
0.11593
0.14906
0.88329
0.21862
4.55449
0.44760
7.12865
0.30708
0.07464
0.03316
0.06631
0.05524
0.06631
NaN
1.02751
0.08134
Netherlands
3.88022
2.57193
4.50062
3.98039
1.01879
1.30626
1.92674
2.86910
5.50433
1.59554
2.48669
1.53543
0.87952
4.29357
1.48112
1.45710
3.78674
2.05048
Poland
1.60636
0.68574
2.02166
20.72778
0.80437
1.28653
0.62665
3.66052
3.24043
1.02287
0.74016
0.61523
0.82182
2.76214
0.58306
0.52433
0.48481
0.12788
Portugal
1.93264
0.67070
0.56195
0.49887
1.96548
0.91179
1.35264
0.73107
0.69663
3.48167
3.74581
1.79719
0.76615
1.57851
1.03354
0.71543
1.22786
0.60699
Romania
14.18754
4.97330
10.78649
7.82456
5.53885
4.59747
1.29897
1.56909
4.84162
4.72854
1.71971
3.68898
2.81691
2.33240
3.16990
1.05372
2.18078
7.87626
Spain
5.47737
3.40115
3.28590
1.40944
3.88856
2.45069
3.05636
4.68771
4.52716
4.58491
7.69076
2.19563
1.17747
4.02850
1.29418
2.12530
3.81443
2.34757
Sweden
2.41189
1.56855
1.61501
8.67041
1.10243
1.15580
1.16412
4.44138
1.50365
1.54067
2.80039
1.68885
2.13639
1.81981
4.88832
4.00724
1.63658
1.17290
United Kingdom
3.59549
3.05729
4.39432
2.51119
4.97541
5.86776
6.38017
4.27086
5.18281
6.11360
3.48299
3.13726
2.39811
3.84653
3.56834
4.83567
4.46973
7.33246
No missing countries
14.1
(a) Index of coastal eutrophication
We use: 1. Gross Nutrient Balance. Target defined by top 3 countries 2. Marine waters affected by eutrophication. Target defined by top 3 countries
# Gross nutrient balance (BAL_UAA, kg/ha), Eurostat# https://ec.europa.eu/eurostat/databrowser/view/AEI_PR_GNB__custom_153613/nutrient = pd.read_csv("../data/aei_pr_gnb__custom_7309259_linear.csv")nutrient["geo"] = nutrient["geo"].map(abbrev_to_country).fillna(nutrient["geo"])years =range(2012, nutrient.TIME_PERIOD.max() +1)# We use all countries in the EU to calculate the target value.nutrient = nutrient[ (nutrient["geo"].isin(countriesEU)) & (nutrient["TIME_PERIOD"].isin(years))]# Countries can have nutrient deficiency (negative value) indicating declining soil fertility.negativeValues(nutrient.OBS_VALUE)nitro = nutrient[nutrient["nutrient"] =="N"].copy()phospho = nutrient[nutrient["nutrient"] =="P"].copy()# For eutrophication, the less nutrient balance the better.def nutrientTarget(df):# we replace 0 with nan to calculate meaningful target target =min(calculate_target(df.replace(0, np.nan), "OBS_VALUE", "geo")) df["target"] = ( (target / df.OBS_VALUE *100).replace([np.inf, -np.inf], 100).clip(0, 100) ) df = df.pivot_table( columns="TIME_PERIOD", index="geo", values="target", aggfunc="mean" )return dfnitro = nutrientTarget(nitro)phospho = nutrientTarget(phospho)years = [2012, 2016, nutrient.TIME_PERIOD.max()]nutrientDict = {"nitro": nitro, "phospho": phospho}for k, v in nutrientDict.items():print("Indicator values for ", k) v[years][v.index.isin(countries)] missingCountries(v)
Negative values in dataset replaced with 0
Indicator values for nitro
No missing countries
Indicator values for phospho
No missing countries
# ph Calculations in pHCountryLevel.ipynb filephCopernicus = pd.read_csv("../dataTemp/phAcidity/phCopernicus.csv", index_col=0)phCopernicus# We set target pH as pre-industrial level https://www.eea.europa.eu/ims/ocean-acidification# pH is measured in logarithmic scale, we need to convert to H+ concentration# for meaningful comparison of pH value changestarget =10**-8.2phCopernicus =10**-phCopernicus# the lower the pH, the more H+ concentrationphCopernicus = target / phCopernicus *100missingCountries(phCopernicus)
2012
2016
2021
Belgium
8.070179
8.066542
8.063813
Germany
8.055145
8.054312
8.044487
Denmark
8.065767
8.064152
8.055584
Estonia
8.004305
8.038195
8.004012
Spain
8.082115
8.072526
8.066342
Finland
8.016771
8.034461
8.012807
France
8.087642
8.077980
8.070770
Ireland
8.096759
8.086586
8.077492
Lithuania
7.952361
7.988199
7.960740
Latvia
7.961943
8.010791
7.970287
Netherlands
8.084946
8.074646
8.068349
Poland
7.910485
7.958628
7.917143
Portugal
8.083828
8.076773
8.067000
Sweden
7.989894
8.017664
7.994139
United Kingdom
8.091344
8.082541
8.073730
No missing countries
2. Carbon emissions per capita
Several sources (see code)
Code
# Population on 1 January, Eurostat# https://ec.europa.eu/eurostat/databrowser/view/tps00001/pop = pd.read_csv("../data/tps00001.csv")pop["geo"] = pop["geo"].map(abbrev_to_country).fillna(pop["geo"])pop = pop[pop["geo"].isin(countriesEU)]pop.rename(columns={"OBS_VALUE": "population"}, inplace=True)
Code
# CO2, TOTX4_MEMONIA, THS_T thousand tonnes, Eurostat# https://ec.europa.eu/eurostat/databrowser/view/ENV_AIR_GGE/co2 = pd.read_csv("../data/env_air_gge.csv")co2["geo"] = co2["geo"].map(abbrev_to_country).fillna(co2["geo"])co2 = co2[ co2["geo"].isin(countriesEU)& (co2.airpol =="CO2")& (co2.src_crf =="TOTX4_MEMONIA")]# Data to include UK. Data for other countries is almost the same as in the previous dataset# Fossil CO2 emissions by country (territorial), million tonnes of carbon per year (1MtC = 1 million tonne of carbon = 3.664 million tonnes of CO2)# https://globalcarbonbudget.org/carbonbudget/co2GCB = pd.read_excel("../data/National_Fossil_Carbon_Emissions_2022v1.0.xlsx", sheet_name=1, skiprows=11, index_col=0,)# convert from carbon to co2 and from Mt to thousand tonnesco2GCB = co2GCB *3.664*1_000co2GCB_UK = ( pd.DataFrame(co2GCB["United Kingdom"]) .reset_index() .rename(columns={"index": "TIME_PERIOD", "United Kingdom": "OBS_VALUE"}))co2GCB_UK["geo"] ="United Kingdom"co2 = pd.concat([co2, co2GCB_UK], ignore_index=True)# thousand tonnes co2 to co2 per capitaco2 = pd.merge(co2, pop, on=["geo", "TIME_PERIOD"], how="left")co2["co2pc"] = co2["OBS_VALUE"] *1_000/ co2["population"]years =range(2012, co2.TIME_PERIOD.max() +1)co2 = co2[co2.TIME_PERIOD.isin(years)]# The less co2pc, the better.target =min(calculate_target(co2, "co2pc", "geo"))co2["target"] = (target / co2.co2pc *100).replace([np.inf, -np.inf], 100).clip(0, 100)years = [2012, 2016, co2.TIME_PERIOD.max()]co2 = co2.pivot_table( columns="TIME_PERIOD", index="geo", values="target", aggfunc="mean")co2[years][co2.index.isin(countries)]missingCountries(co2)
TIME_PERIOD
2012
2016
2021
geo
Belgium
37.734121
39.459259
41.833681
Denmark
47.376240
51.718478
68.103705
Estonia
26.783744
27.085479
45.744259
Finland
36.906728
40.406167
51.708163
France
61.806654
66.847742
75.572829
Germany
34.783657
35.997628
43.288290
Ireland
41.662118
39.922992
46.737265
Latvia
93.965364
93.972191
91.681403
Lithuania
76.334698
77.951383
72.164336
Netherlands
34.459622
34.839212
42.925952
Poland
42.102450
42.241700
41.185795
Portugal
72.420665
69.650043
88.751684
Spain
58.295610
61.088196
72.095049
Sweden
70.390420
77.794760
95.307931
United Kingdom
47.256895
59.384996
NaN
No missing countries
14.4
Proportion of fish stocks within biologically sustainable levels
# EEZ area# Data: https://emodnet.ec.europa.eu/geonetwork/srv/eng/catalog.search#/metadata/d4a3fede-b0aa-485e-b4b2-77e8e3801fd0# https://www.europarl.europa.eu/factsheets/en/sheet/100/outermost-regions-ors-outermost = [# Portugal"Azores","Madeira",# Spain"Canary Islands",# we could include outermost regions of France,# but we care about EEZs in the Atlantic, Baltic and North Sea]eez = pd.read_csv("../data/EMODnet_EEZ_v11_20210506.csv", delimiter=";", encoding="latin-1",)eez = eez[eez["Territory"].isin(outermost + countries)]for country in ["France", "Portugal", "Spain"]: eez.loc[eez["Country"].str.contains(country), "Country"] = countryeez = eez.apply(pd.to_numeric, errors="ignore")eez = eez[["Country", "Area_km2"]].groupby(["Country"]).sum(numeric_only=True)eez = eez[eez.index.isin(countries)]eez
# selection of subsidies by level of risk to fishery sustainability# as per https://stats.oecd.org/Index.aspx?QueryId=121718highRisk = ["I.E.1. Fuel tax concessions","I.A.1. Transfers based on variable input use","I.A.2.1.Support to vessel construction/purchase","I.A.2.2.Support to modernisation","I.A.2.3.Support to other fixed costs","II.A. Access to other countries’ waters","I.B.2. Special insurance system for fishers",]moderateRisk = ["I.B.1. Income support","I.C. Transfers based on the reduction of productive capacity","II.B.1. Capital expenditures","II.B.2. Subsidized access to infrastructure",]uncertainRisk = ["I.D. Miscellaneous direct support to individuals and companies","I.E.2. Other tax exemptions","II.D. Support to fishing communities","II.C. Marketing and promotion","II.E. Education and training","II.F. Research and development","II.H. Miscellaneous support for services to the sector",]noRisk = ["II.G.1. Management expenditures","II.G.2. Stock enhancement programs","II.G.3. Enforcement expenditures",]
Code
# Fisheries Support Estimate# https://stats.oecd.org/Index.aspx?DataSetCode=FISH_FSE# selection as per https://stats.oecd.org/Index.aspx?QueryId=121718fse = pd.read_csv("../data/FISH_FSE.csv")years =range(2012, fse.Year.max())# we use US dollars because some countries use their own currency ('Danish Krone', 'Zloty', 'Swedish Krona')fse = fse[ (fse["Country"].isin(countriesEU))& (fse["Measure"].isin(["US dollar"]))& (fse["Year"].isin(years))]# strip variable codes and delete parent variables to avoid double counting# solution from https://stackoverflow.com/q/76183612/14534411separator ="."fse["vCode"] = fse.Variable.str.rsplit(".", n=1).str.get(0)variableAll = fse.vCode.unique().tolist()def is_parent(p, target):return p.startswith(target) andlen(p) >len(target) and p[len(target)] =="."vSupport = []prev =""for s insorted(variableAll) + [""]:if prev andnot is_parent(s, prev): vSupport.append(prev) prev = sfse = fse[(fse.vCode.isin(vSupport)) | (fse.vCode.isna())]# We calculate the ratio of high-risk subsidies to total subsidiesfseRisk = fse[fse.Variable.isin(highRisk)]fseRisk = fseRisk.groupby(["Country", "Year"]).sum(numeric_only=True)fse = fse.groupby(["Country", "Year"]).sum(numeric_only=True)fseRiskRatio = (fseRisk / fse *100).reset_index()# The less high-risk subsidies the better. We ignore 0 values to calculate meaningful targettarget =min( calculate_target(fseRiskRatio[fseRiskRatio.Value !=0], "Value", "Country"))fseRiskRatio["target"] = ( (target / fseRiskRatio.Value *100).replace([np.inf, -np.inf], 100).clip(0, 100))years = [2012, 2016, fseRiskRatio.Year.max()]fseRiskkRatio = fseRiskRatio.pivot_table( columns="Year", index="Country", values="target", aggfunc="mean")fseRiskkRatio[years][fseRiskkRatio.index.isin(countries)]missingCountries(fseRiskkRatio)
selectedIUU = ["Accepted FAO Compliance Agreement","Mandatory vessel tracking for commercial seagoing fleet","Operate a national VMS/FMC centre","Designated ports specified for entry by foreign vessels","Ratification/accession of UNCLOS Convention","Ratification/accession of UNFSA","Have a NPOA-IUU ","Compliance with RFMO flag state obligations","Compliance with RFMO port state obligations",]
Code
iuuIndex = pd.read_csv("../data/iuu_fishing_index_2021-data_and_disclaimer/iuu_fishing_index_2019-2021_indicator_scores.csv")iuuIndex = iuuIndex[ (iuuIndex["Country"].isin(countries))& (iuuIndex["Indicator name"].isin(selectedIUU))]iuuIndex["Score"] = iuuIndex.groupby( ["Year", "Indicator name"], sort=False, group_keys=False)["Score"].apply(lambda x: x.fillna(x.mean()))iuuIndex = iuuIndex.pivot_table( index=["Country", "Year"], columns="Indicator name", values="Score")alpha =1/len(iuuIndex.columns)sigma =10compositeIUU = ci.compositeDF(alpha, iuuIndex, sigma)compositeIUU = pd.DataFrame(compositeIUU, columns=["IUU"])# The target is the best in the IIU Fishing Index = 1compositeIUU["target"] =1/ compositeIUU.IUU *100compositeIUU = compositeIUU.reset_index().pivot_table( index="Country", columns="Year", values="target")compositeIUUmissingCountries(compositeIUU)
# Livelihoods - Quality# Hours Worked https://blue-economy-observatory.ec.europa.eu/blue-economy-indicators_en# Select Insight Advisor, search in Asset for the measure and select Dimensions: year, member state# hoursWorked = pd.read_excel('../data/blueEconomyObservatory/hoursWorked.xlsx')# hoursWorked.rename(columns={'Member State':'country_name','Year':'year','Hours worked by employees':'hoursWorked'}, inplace=True)# gvaHoursW = hoursWorked.merge(gvaEcon.reset_index(), on=['country_name','year'], how='left')# gvaHoursW['gvaHoursW'] = gvaHoursW['value'] / gvaHoursW['hoursWorked'] * 1_000_000# gvaHoursW = gvaHoursW[['country_name','year','gvaHoursW']].set_index(['country_name','year'])# gvaHoursW.pivot_table(index='country_name', columns='year', values='gvaHoursW')# Above, I calculated the GVA per hour worked ratio using the raw data of each variable.# The result is different from the ratio variable provided by the Blue Economy Observatory.# I emailed them to ask for clarification.gvaHoursW = pd.read_excel("../data/blueEconomyObservatory/GVAperHourWorked.xlsx").set_index(["Year", "Member State"])gvaHoursW.rename( columns={"Gross value added per hour worked by employees": "gvaHoursW"}, inplace=True,)gvaHoursW["gvaHoursW"] = gvaHoursW["gvaHoursW"].apply(pd.to_numeric, errors="coerce")# Comparative price levels https://ec.europa.eu/eurostat/databrowser/view/TEC00120/default/table?lang=enpppEurostat = pd.read_csv("../data/pppEurostat.csv")pppEurostat.geo = pppEurostat.geo.replace(abbrev_to_country)pppEurostat = ( pppEurostat[pppEurostat.geo.isin(countriesEU)] .rename(columns={"geo": "Member State", "TIME_PERIOD": "Year", "OBS_VALUE": "ppp"})[ ["ppp", "Member State", "Year"] ] .set_index(["Member State", "Year"]))gvaHoursW = gvaHoursW.merge(pppEurostat, left_index=True, right_index=True)gvaHoursW["gvaHoursWppp"] = gvaHoursW["gvaHoursW"] / gvaHoursW["ppp"] *100gvaHoursW = gvaHoursW[ gvaHoursW.index.get_level_values("Year").isin(range(2012, 2023))].reset_index()# We set target using top 3 countries. The more gva/hours worked the better.target =max(calculate_target(gvaHoursW, "gvaHoursWppp", "Member State"))gvaHoursW["target"] = ( (gvaHoursW.gvaHoursWppp / target *100).replace([np.inf, -np.inf], 100).clip(0, 100))years = [2012, 2016, gvaHoursW.Year.max()]gvaHoursW = gvaHoursW.pivot_table(index="Member State", columns="Year", values="target")gvaHoursW[years][gvaHoursW.index.isin(countries)]missingCountries(gvaHoursW)
Year
2012
2016
2020
Member State
Belgium
73.226756
70.153897
79.084103
Estonia
13.004557
13.749318
18.786201
Finland
26.215711
27.738387
27.423564
Germany
36.439759
42.221763
48.441349
Ireland
23.178746
24.868589
22.363956
Latvia
7.777087
8.912797
13.547465
Lithuania
9.675183
15.099927
19.144132
Netherlands
100.000000
64.869507
46.389875
Poland
19.237395
20.772320
27.250130
Portugal
14.911299
16.776657
17.068778
Spain
25.501543
23.418572
26.326202
Sweden
30.692363
34.465893
36.985960
United Kingdom
86.269042
52.194932
67.755189
Missing countries:
Denmark
France
Code
# Livelihoods - Quantity# FTE employees https://blue-economy-observatory.ec.europa.eu/blue-economy-indicators_enfteEmployment = pd.read_excel("../data/blueEconomyObservatory/employmentFTE.xlsx")fteEmployment.rename( columns={"Member State": "country_name","Year": "year","Employees in full time equivalent units": "fteEmployees", }, inplace=True,)# Coastal areas https://ec.europa.eu/eurostat/web/coastal-island-outermost-regions/methodologycoastArea = pd.read_excel("../data/NUTS2021Coastal.xlsx", sheet_name="Coastal regions")[ ["NUTS_ID", "COASTAL CATEGORY"]]coastArea.rename( columns={"NUTS_ID": "geo", "COASTAL CATEGORY": "coastal"}, inplace=True)# Population https://ec.europa.eu/eurostat/databrowser/view/DEMO_R_PJANAGGR3__custom_7060174/default/table?lang=enpopulation = pd.read_csv("../data/demoNUTS3.csv")coastalPop = coastArea.merge(population, on="geo", how="left")# France and UK have three letter abbreviation codescoastalPop["country"] = coastalPop.geo.str.extract(r"([a-zA-Z]{2})").replace( abbrev_to_country)coastalPop = coastalPop[coastalPop.country.isin(countriesEU)]coastalPop = coastalPop.dropna(subset=["TIME_PERIOD"])coastalPop["TIME_PERIOD"] = coastalPop["TIME_PERIOD"].astype(int)coastalPop = ( coastalPop.groupby( ["country","TIME_PERIOD","coastal", ] ) .sum(numeric_only=True) .reset_index())# 0 non-coastal, 1 coastal (on coast), 2 coastal (>= 50% of population living within 50km of the coastline)coastalPop = ( coastalPop[coastalPop["coastal"].isin([1, 2])] .groupby(["country", "TIME_PERIOD"]) .sum())fteCoastalPop = coastalPop.merge( fteEmployment, left_on=["country", "TIME_PERIOD"], right_on=["country_name", "year"], how="inner",)fteCoastalPop["fteCoastalPop"] = ( fteCoastalPop["fteEmployees"] / fteCoastalPop["OBS_VALUE"])# We set target using top 3 countries. The more FTE employees/Costal population the bettertarget =max(calculate_target(fteCoastalPop, "fteCoastalPop", "country_name"))fteCoastalPop["target"] = ( (fteCoastalPop.fteCoastalPop / target *100) .replace([np.inf, -np.inf], 100) .clip(0, 100))years = [2012, 2016, fteCoastalPop.year.max()]fteCoastalPop = fteCoastalPop.pivot_table( index="country_name", columns="year", values="target")fteCoastalPop[years][fteCoastalPop.index.isin(countries)]missingCountries(fteCoastalPop)
year
2012
2016
2020
country_name
Belgium
8.301720
7.469303
7.612006
Denmark
15.210299
16.648394
16.145873
Estonia
73.518596
62.530963
41.382950
Finland
17.422097
14.952275
11.662557
France
15.224464
14.087975
12.409121
Germany
53.966767
60.393138
61.643754
Ireland
5.668473
8.099165
8.990206
Latvia
23.222019
26.817885
22.008593
Lithuania
73.725091
79.037007
91.062719
Netherlands
20.941432
12.987307
15.318658
Poland
36.379926
34.737284
35.336342
Portugal
15.794767
18.095508
17.850648
Spain
23.014225
24.831960
18.070642
Sweden
10.492028
11.198808
10.685918
United Kingdom
10.121654
9.924725
NaN
No missing countries
2. Tourism: GVA/nights spents
Code
# Tourism: GVA/nights spents# Nights spent at tourist accommodation establishments in coastal areas# https://ec.europa.eu/eurostat/databrowser/view/TOUR_OCC_NIN2DC__custom_7065857/default/table?lang=ennightCoast = pd.read_csv("../data/nightsAccomoCoastal.csv")nightCoast.geo = nightCoast.geo.replace(abbrev_to_country)nightCoast = nightCoast[nightCoast.geo.isin(countriesEU)]gvaAccomodation = gva[gva["Sub-sector"] =="Accommodation"]gvaNights = gvaAccomodation.reset_index().merge( nightCoast, left_on=["country", "Year"], right_on=["geo", "TIME_PERIOD"], how="inner",)gvaNights["gvaNights"] = gvaNights["gva"] *1_000_000/ gvaNights["OBS_VALUE"]# We set target using top 3 countries. The more GVA/nights spents the bettertarget =max(calculate_target(gvaNights, "gvaNights", "country"))gvaNights["target"] = ( (gvaNights.gvaNights / target *100).replace([np.inf, -np.inf], 100).clip(0, 100))years = [2012, 2016, gvaNights.Year.max()]gvaNights = gvaNights.pivot_table(index="country", columns="Year", values="target")gvaNights[years][gvaNights.index.isin(countries)]missingCountries(gvaNights)
Year
2012
2016
2020
country
Belgium
45.334240
38.058850
41.541088
Denmark
81.889205
93.267237
100.000000
Estonia
58.620529
58.491705
75.930535
Finland
65.419935
79.994586
55.925323
France
47.093545
47.012451
42.348381
Germany
70.673069
71.550445
85.368497
Ireland
NaN
85.079465
100.000000
Latvia
38.809039
53.736533
47.795775
Lithuania
33.817135
52.212723
48.222261
Netherlands
39.665778
40.625908
31.666511
Poland
60.057201
41.072395
27.725916
Portugal
53.680933
57.461233
67.520145
Spain
53.868019
56.316363
44.842960
Sweden
62.006181
75.725888
74.427793
United Kingdom
59.320101
39.530034
NaN
No missing countries
14.a
Proportion of total research budget allocated to research in the field of marine technology
We use two indicators:
Official UNSD indicator ER_RDE_OSEX. Target is top 3 countries
SAD/TAC: Catch-weighted TAC relative to Scientific Advice
1. Ocean science expenditure ER_RDE_OSEX
Several sources
Note: ER_RDE_OSEX data comes from Global Ocean Science Report (GOSR) 2020, and goes from 2013 to 2017. Data for 2009-2012 data is available in the UNstats archive (download csv for 29-Mar-19)
Code
# %%time# National ocean science expenditure as a share of total research and development funding (%), UNstats archive (years 2009-2012)# https://unstats.un.org/sdgs/indicators/database/archive# # read old data by chunks to speed loading and save 14.a.1 to separate file# iter_csv = pd.read_csv('./data/AllData_Onward_20190329.csv', iterator=True, chunksize=1000, encoding='cp1252')# oResearchOld = pd.concat([chunk[chunk.Indicator == '14.a.1'] for chunk in iter_csv])# oResearchOld.to_csv('./data/archive14a1.csv', index=False)oResearchOld = pd.read_csv("../data/archive14a1.csv")oResearchOld = oResearchOld.pivot( index="GeoAreaName", columns="TimePeriod", values="Value")
Code
# National ocean science expenditure as a share of total research and development funding (%), UNstats# https://unstats.un.org/sdgs/dataportal/database# read official data and merge with archive dataoResearch = sdg14[sdg14["SeriesCode"] =="ER_RDE_OSEX"].pivot_table( columns="TimePeriod", index="GeoAreaName", values="Value", aggfunc="mean")oResearch = oResearchOld.merge( oResearch, left_index=True, right_index=True, how="outer")# use all countries in EuropeoResearch = oResearch[oResearch.index.isin(countriesEU)].dropna(how="all", axis=1)# fill nan of year 2013 from new reportoResearch[2013] = oResearch["2013_y"].fillna(oResearch["2013_x"])oResearch = oResearch[list(range(2013, 2018))]# weighted by EEZ areaoResearch = oResearch.merge(eez, left_index=True, right_index=True, how="outer")for col in oResearch.drop("Area_km2", axis=1).columns: oResearch[col] = ( oResearch[col] * oResearch["Area_km2"] / (oResearch["Area_km2"].sum()) )oResearch = ( oResearch.drop("Area_km2", axis=1) .stack() .reset_index() .rename(columns={0: "oResearch", "level_1": "Year", "level_0": "country"}))# We set target with top 3 countries. The more ocean science expenditure/total R&D funding, the better.target =max(calculate_target(oResearch, "oResearch", "country"))oResearch["target"] = ( (oResearch.oResearch / target *100).replace([np.inf, -np.inf], 100).clip(0, 100))years = [2013, 2016, oResearch.Year.max()]oResearch = oResearch.pivot_table(index="country", columns="Year", values="target")oResearch[years][oResearch.index.isin(countries)]missingCountries(oResearch)
Year
2013
2016
2017
country
Belgium
0.310608
0.233153
0.221183
Finland
6.119042
6.207167
6.142723
France
65.177100
NaN
43.513303
Germany
4.346289
3.390835
3.315717
Ireland
NaN
NaN
100.000000
Netherlands
3.818245
3.248525
3.598533
Poland
1.229127
0.853481
0.823581
Portugal
NaN
100.000000
NaN
Spain
68.138571
100.000000
100.000000
United Kingdom
100.000000
94.759160
100.000000
Missing countries:
Denmark
Estonia
Lithuania
Latvia
Sweden
# Percentage of Fish Species Threatened# Time series comparison is tricky: https://www.iucnredlist.org/assessment/red-list-indexthreatenedFish = pd.read_csv("../dataTemp/threatenedFishIUCN.csv")threatenedFish = threatenedFish.pivot_table( index="name", columns="year", values="threatenedScore")threatenedFishmissingCountries(threatenedFish)
year
2012
2016
2022
name
Belgium
77.777778
87.786260
84.967320
Denmark
83.516484
89.944134
86.138614
Estonia
80.952381
89.473684
90.243902
Finland
81.818182
89.189189
90.000000
France
88.135593
93.216080
91.332611
Germany
77.611940
87.022901
82.894737
Ireland
86.592179
93.750000
91.885965
Latvia
77.272727
87.804878
88.636364
Lithuania
77.272727
87.500000
88.372093
Netherlands
83.132530
91.489362
89.047619
Poland
76.923077
87.234043
86.274510
Portugal
89.573460
94.731065
92.578125
Spain
89.938398
94.646465
92.513863
Sweden
77.272727
87.121212
84.027778
United Kingdom
86.729858
93.827160
91.337100
No missing countries
14.c
Number of countries making progress in ratifying, accepting and implementing through legal, policy and institutional frameworks, ocean-related instruments that implement international law, as reflected in the United Nations Convention on the Law of the Sea
We use two indicators:
Participation in agreements of the International Marine Organization (IMO Participation Rate). No further transformation
1. Participation in agreements of the International Marine Organization
# dictionary for country codes used by the GISISwithopen("../data/gisisDict.csv") as csv_file: reader = csv.reader(csv_file) gisisDict =dict(reader)gisisDict = {v: k for k, v in gisisDict.items()}
Code
# Participation in agreements of the International Marine Organization# https://www.imo.org/en/About/Conventions/Pages/StatusOfConventions.aspx Excel file with current status of IMO conventions# We get the historical data from the GISIS database: https://gisis.imo.org/Public/ST/Ratification.aspx# You need to create account to access data.# I tried to scrape the data but I am getting errors with Selenium and bs4.# I downloaded the html manuallygisisCountries = {k: v for k, v in gisisDict.items() if k in countries}listIMO = []for v in gisisCountries.values(): link ="https://gisis.imo.org/Public/ST/Ratification.aspx?cid="+str(v) listIMO.append(link)# for i in listIMO:# webbrowser.open(i)
Code
imoRatedf = pd.DataFrame(columns=["Country", 2012, 2016, 2021])# loop thru the html files in the folder and extract the datafor i inrange(len(os.listdir("../data/treatiesIMO/"))): countryIMO = np.nan imoRate = pd.read_html("../data/treatiesIMO/Status of Treaties » Ratification of Treaties{}.html".format( i ) )[4]# get the country name from the html file name by checking if string is in the list of countriesfor country in countries:if country in imoRate["Treaty"][0]: countryIMO = countryif countryIMO isnot np.nan: imoRate.columns = imoRate.iloc[1] imoRate = imoRate[2:]# new columns with the year of accession and denounced imoRate["accession"] = ( imoRate["Date of entry into force in country"] .str.extract("^([^(]+)") .fillna("") ) imoRate["denounced"] = imoRate["Date of entry into force in country" ].str.extract(".*\\:(.*)\\).*") imoRate[["accession", "denounced"]] = ( imoRate[["accession", "denounced"]] .apply(pd.DatetimeIndex) .apply(lambda x: x.dt.year) )# count the number of treaties each country accessioned and didn't denounced by 2012, 2016 and 2021for i in (2012, 2016, 2021): imoRate[str(i)] = np.where( (imoRate.accession < i)& ((imoRate.denounced > i) | (imoRate.denounced.isna())),1,0, ) imoCount = ( countryIMO, imoRate["2012"].sum(), imoRate["2016"].sum(), imoRate["2021"].sum(), ) imoRatedf.loc[len(imoRatedf), imoRatedf.columns] = imoCount# calculate total possible treaties, apply dif-ref and convert to percentagetargetIMO =len(imoRate.dropna(subset=["Date of entry into force in country"]))imoRatedf = imoRatedf.set_index("Country").sort_index()imoRatedf =100* imoRatedf / targetIMOimoRatedf[imoRatedf >100] =100imoRatedf = imoRatedf.astype(np.float64)imoRatedf = imoRatedf.apply(pd.to_numeric)imoRatedfmissingCountries(imoRatedf)
2012
2016
2021
Country
Belgium
78.431373
76.470588
90.196078
Denmark
80.392157
86.274510
92.156863
Estonia
76.470588
78.431373
82.352941
Finland
76.470588
78.431373
90.196078
France
84.313725
84.313725
96.078431
Germany
80.392157
82.352941
88.235294
Ireland
66.666667
68.627451
68.627451
Latvia
80.392157
80.392157
82.352941
Lithuania
58.823529
62.745098
64.705882
Netherlands
84.313725
86.274510
92.156863
Poland
80.392157
84.313725
86.274510
Portugal
68.627451
76.470588
86.274510
Spain
86.274510
86.274510
88.235294
Sweden
82.352941
90.196078
94.117647
United Kingdom
78.431373
78.431373
78.431373
No missing countries
Indicators aggregation
Given our ratio-scale full comparable indicators,, meaningful aggregation of indicators into a composite indicator is obtained according to social choice theory by applying a generalized mean:
with weights and . The parameter is used to quantify the elasticity of substitution between the different indicators. High (low) values of imply good (poor) substitution possibilities which means that a high score in one indicator can (cannot) compensate a low score in another indicator. Consequently, high and low values of correspond to concepts of weak and strong sustainability, respectively. Depending on , one can obtain a full class of specific function forms for the composite indicator.
We define:
and
Code
varDf = [ nitro, phospho, eutro, wasteG, wasteR, mspVal, ohiBio, co2, phCopernicus, fmsyF, bBmsy, kbaMPA, natura2kEEZ, fseRiskkRatio, tacCatch, compositeIUU, fteCoastalPop, gvaHoursW, economies, gvaNights, oResearch, sadTac, ohiArt, threatenedFish, imoRatedf,]varNames = ["nitro","phospho","eutro","wasteG","wasteR","mspVal","ohiBio","co2","phCopernicus","fmsyF","bBmsy","kbaMPA","natura2kEEZ","fseRiskkRatio","tacCatch","compositeIUU","fteCoastalPop","gvaHoursW","economies","gvaNights","oResearch","sadTac","ohiArt","threatenedFish","imoRatedf",]dictIndicators =dict(zip(varNames, varDf))# stack variables in each dataframefor name, df in dictIndicators.items(): df = df.stack().to_frame().rename(columns={0: str(name)}) df.index.names = ["Country", "Year"] df.reset_index(inplace=True) df.Year = df.Year.astype(int) df.set_index(["Country", "Year"], inplace=True) dictIndicators[name] = df# merge all variables into one dataframeindicators = pd.concat(dictIndicators.values(), axis=1, join="outer")years =list(range(2012, indicators.index.get_level_values("Year").max() +1))indicators = indicators.reset_index().sort_values(["Country", "Year"])indicators = indicators[ (indicators.Year.isin(years)) & (indicators.Country.isin(countries))]indicators.to_csv("../output/data/indicatorsOriginal.csv", index=False)indicatorsOriginal = indicators.copy()# We fill missing values using nearest, then forward and back fill by country# We care about 2012, 2016, and most recent. Nearest only affects fseRiskRatio# Solution from https://stackoverflow.com/a/70363774/14534411indicators = indicators.groupby(["Country"], group_keys=True).apply(lambda group: ( group.interpolate( method="nearest", axis=0, )if np.count_nonzero(np.isnan(group)) < (len(group) -1)else group ))indicators = indicators.groupby(["Country"], group_keys=False).apply(lambda group: group.interpolate(method="linear", axis=0, limit_direction="both"))indicators.index = indicators.index.droplevel(1)# We then use mean of other countries to fill countries missing all values. This affects:# Finland (fseRIskRatio), France (gvaHoursW), Denmark (gvaHoursW, oResearch),# Estonia, Latvia, Lithuania, Sweden (oResearch)indicators = indicators.groupby("Year", group_keys=False).apply(lambda x: x.fillna(x.mean()))indicators = indicators.set_index("Year", append=True)# 14.1 and 14.7 have subindicators that need to be combined into a composite indicatorcompoIndicaDict = {"plastic": ["wasteG", "wasteR"],"nutrient": ["nitro", "phospho"],"livelihoods": ["fteCoastalPop", "gvaHoursW"],}# calculate composite indicators using weak sustainability. Funtion in compositeIndicators.pyfor target, indicator in compoIndicaDict.items():try: alpha =1/len(compoIndicaDict[target]) df = indicators[indicator] sigma =10 indicators[target] = ci.compositeDF(alpha, df, sigma)exceptKeyError:print("Missing indicator for", target)indicators.to_csv("../output/data/indicatorsImputed.csv", index=True)
Target level aggregation
Code
targetsDict = {"14.1 Pollution": ["nutrient", "eutro", "plastic"],"14.2 Ecosystem": ["mspVal", "ohiBio"],"14.3 Acidification": ["co2", "phCopernicus"],"14.4 Sustainable Fishing": ["fmsyF", "bBmsy"],"14.5 Protection": ["kbaMPA", "natura2kEEZ"],"14.6 Incentives": ["fseRiskkRatio", "tacCatch", "compositeIUU"],"14.7 Economics": ["livelihoods", "economies", "gvaNights"],"14.a Science": ["oResearch", "sadTac"],"14.b Small-scale Fishing": ["ohiArt", "threatenedFish"],"14.c Marine Agreements": ["imoRatedf"],}# target using weak sustainability.targets = indicators.copy()for target, indicator in targetsDict.items():try: alpha =1/len(targetsDict[target]) df = targets[indicator] sigma =10# or 0.5 for strong sustainability targets[target] = ci.compositeDF(alpha, df, sigma)exceptKeyError:print("Missing indicator for", target)targets = targets[[i for i in targets if re.findall(r"14", i)]]targets.to_csv("../output/data/targetsComposite.csv", index=True)
%%time# calculate composite score for each target importing the function from composite.py# monte carlo for strong sustainability and one sigma for weak sustainabilitymostRecent = targets.index.get_level_values("Year").max()scoresStrong = ci.compositeMC(df = targets, years=[2012, 2016, mostRecent], simulations=10_000)scoresStrong = scoresStrong[scoresStrong.index.get_level_values('Country').isin(countries)]scoresWeak = pd.DataFrame((1/len(targets.columns) * targets).sum(axis=1), columns=['scoreWeak'])
CPU times: user 6.28 s, sys: 8.75 ms, total: 6.29 s
Wall time: 6.29 s
Denmark gvaHoursW is missing all values
Denmark oResearch is missing all values
Estonia oResearch is missing all values
Finland fseRiskkRatio is missing all values
France gvaHoursW is missing all values
Latvia oResearch is missing all values
Lithuania oResearch is missing all values
Sweden oResearch is missing all values
United Kingdom eutro is missing all values
progressTarget = progressTrend.merge( targetIndicatorsPair, left_on="Indicator", right_on="indicator", how="inner")progressTarget = progressTarget[progressTarget.target.str.startswith("14")]# the progress for the goal score is the arithmetic mean of the progress of the targets (weak sustainability)progressTarget = ( progressTarget.groupby(["target", "Country"]).mean(numeric_only=True).reset_index())progressTarget = progressTarget.groupby("Country").mean(numeric_only=True).reset_index()progressTarget[["Country", "Slope"]].sort_values("Slope", ascending=False)
Country
Slope
2
Estonia
1.448798
8
Lithuania
1.047594
11
Portugal
0.965072
13
Sweden
0.821055
12
Spain
0.737858
10
Poland
0.732307
7
Latvia
0.729720
3
Finland
0.692078
4
France
0.662620
1
Denmark
0.605205
6
Ireland
0.268582
5
Germany
0.099177
14
United Kingdom
0.019422
0
Belgium
-0.170961
9
Netherlands
-0.306052
Plots
Code
# Get EEZ-weigthed average for the plotseezAverage = pd.DataFrame(plotsData.reset_index()).merge( eez, left_on="Country", right_on="Country", how="left")eezAverage["valueEEZ"] = eezAverage.value * eezAverage.Area_km2eezAverage = ( eezAverage.groupby(["Year", "indicator"]) .sum(numeric_only=True) .drop("value", axis=1))eezAverage["averageEEZ"] = eezAverage.valueEEZ / eezAverage.Area_km2
EEZ-weighted average change between 2012 and most recent year
# define function to plot boxplotsdef plotBox( df, xlegend, ylegend, maxShift, minShift, xaxis_title=str, yaxis_title=str, colorGroup=str,):# start plots fig = px.box( df, x="indicator", y="value", boxmode="overlay",# color all grey color_discrete_sequence=["grey"], )# add Countries annotation in the max and min# create annotation list, x is the x-axis position of the annotation annoList = [] maxCountriesD = {} x =0for s in df.indicator.unique():# get max value for indicator maxVal = np.max(df[df["indicator"] == s]["value"])# get countries with max value, if more than 4 countries, use * countries = df.loc[ (df["value"] == maxVal) & (df["indicator"] == s), "Country" ].valuesiflen(countries) >4: maxCountriesD[s] = countries countries ="*" countries =",<br>".join(countries) annotation =dict( x=x,# y position is the max value y=maxVal, text=countries, yshift=maxShift, showarrow=False, ) annoList.append(annotation) x +=1 minCountriesD = {} x =0for s in df.indicator.unique():# get min value for indicator minVal = np.min(df[df["indicator"] == s]["value"])# get countries with min value, if more than 4 countries, use * and store in dictionary countries = df.loc[ (df["value"] == minVal) & (df["indicator"] == s), "Country" ].valuesiflen(countries) >4: minCountriesD[s] = countries countries ="*" countries =",<br>".join(countries) annotation =dict( x=x,# y position is the min value y=minVal, text=countries, yshift=minShift, showarrow=False, ) annoList.append(annotation) x +=1# add EEZ-weigthed average values mostRecent = eezAverage.index.get_level_values("Year").max() indicatorAverage = eezAverage.loc[ (mostRecent, df.indicator.unique()), : ].reset_index()for s in indicatorAverage.indicator.unique():# get EEZ-weigthed average value for indicator averageVal = indicatorAverage[indicatorAverage["indicator"] == s]["averageEEZ" ].values[0] fig.add_scatter( x=[s],# y position is the average value y=[averageVal],type="scatter", mode="markers", legendgroup="EEZ-weighted average", name="EEZ-weighted average", marker=dict(color="black", size=6), ) fig.update_layout(annotations=annoList)# just to add the legend with one entry fig.update_traces(showlegend=False).add_scatter( x=[s], y=[averageVal],type="scatter", mode="markers", legendgroup="EEZ-weighted average", name="EEZ-weighted average", marker=dict(color="black", size=6), )# change legend position, axis titles fig.update_layout( height=800, width=1000, legend=dict( x=xlegend, y=ylegend, traceorder="normal", font=dict(family="sans-serif", size=25, color="black"), bgcolor="White", bordercolor="black", borderwidth=1, ), font=dict(family="sans-serif", size=25, color="black"), xaxis_title=xaxis_title, yaxis_title=yaxis_title, plot_bgcolor="white",# yaxis_range=[-20,100] margin=dict(l=0, r=0, b=0, t=30), paper_bgcolor="white", ) fig.update_yaxes(title_font=dict(size=25)) fig.update_xaxes(title_font=dict(size=25)) fig.update_xaxes(tickangle=90) fig.update_layout(template="simple_white") fig.write_image("../output/figs/boxPlot{0}.png".format(xaxis_title), scale=2) fig.write_image("../output/figs/boxPlot{0}.pdf".format(xaxis_title)) fig.show()
Code
# plot min, max, and EEZ-weigthed average for targets# only for most recent yearboxPlotData = plotsData[ plotsData.index.get_level_values("Year").isin([mostRecent])].reset_index()boxPlotData.Country = boxPlotData.Country.map(country_to_abbrev)boxPlotData.sort_values(by=["target"], inplace=True)indicatorsBox = boxPlotData[ boxPlotData.indicator.isin(list(indicatorNames.values()))].copy()# plotBox(# df=indicatorsBox,# xaxis_title="Indicators",# yaxis_title="Score",# xlegend=0.85,# ylegend=0.2,# maxShift=30,# minShift=-20,# colorGroup="target",# )targetBox = boxPlotData[ boxPlotData.indicator.isin(list(targetsDict.keys()))& boxPlotData.indicator.str.startswith("14")]plotBox( df=targetBox, xaxis_title="Targets", yaxis_title="Score", xlegend=0.36, ylegend=1.15, maxShift=30, minShift=-10, colorGroup="indicator",)
Code
# split so that font can be biggerindicatorsBox["target"] = ( indicatorsBox["target"].str[:4] +"<br>"+ indicatorsBox["target"].str[4:])
Code
# Boxplots with subplots for each targetboxesLen =len(indicatorsBox.target.unique().tolist())boxes = []# Initialize the subplot figurefig = make_subplots( rows=2, cols=int(boxesLen /2), subplot_titles=indicatorsBox.target.unique().tolist(), shared_yaxes=True, horizontal_spacing=0.005, vertical_spacing=0.30,# y_title="Percentage",)rowDict = {1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 2, 7: 2, 8: 2, 9: 2, 10: 2}# otherwise the large font and the boxplots conflict# Create a boxplot facet for each targetfor i, target inenumerate(indicatorsBox.target.unique().tolist()): traceTemp = go.Box( y=indicatorsBox[indicatorsBox.target == target].value, x=indicatorsBox[indicatorsBox.target == target].indicator, name=target, marker=dict(color="grey"), showlegend=False, ) boxes.append(traceTemp)# filter df to get EEZ-weighted average for each indicator within the target mostRecent =int(eezAverage.index.get_level_values("Year").max()) indicatorAverage = eezAverage.loc[ (mostRecent, indicatorsBox[indicatorsBox.target == target].indicator.unique()), :, ].reset_index() indicatorAverage.sort_values(by=["indicator"], inplace=True)for s in indicatorAverage.indicator.unique():# get EEZ-weigthed average value for each indicator averageVal = indicatorAverage[indicatorAverage["indicator"] == s]["averageEEZ" ].values[0] tempScatter = go.Scatter( x=[s], y=[averageVal],type="scatter", mode="markers", legendgroup="EEZ-weighted average", name="EEZ-weighted average", marker=dict(color="black", size=6), showlegend=False, ) fig.add_trace(tempScatter, row=rowDict[i +1], col=i %5+1)# max and min values for the indicator within the target maxVal = np.max( indicatorsBox[ (indicatorsBox.target == target) & (indicatorsBox.indicator == s) ]["value"] ) minVal = np.min( indicatorsBox[ (indicatorsBox.target == target) & (indicatorsBox.indicator == s) ]["value"] )# get countries with max and min values and create annotation max_countries = indicatorsBox.loc[ (indicatorsBox["value"] == maxVal)& (indicatorsBox["indicator"] == s)& (indicatorsBox["target"] == target),"Country", ].values min_countries = indicatorsBox.loc[ (indicatorsBox["value"] == minVal)& (indicatorsBox["indicator"] == s)& (indicatorsBox["target"] == target),"Country", ].valuesiflen(max_countries) >3: max_countries ="*"else: max_countries =", ".join(max_countries)iflen(min_countries) >3: min_countries ="*"else: min_countries =", ".join(min_countries) max_annotation =dict( x=s, y=maxVal, text=max_countries, yshift=15, showarrow=False, ) min_annotation =dict( x=s, y=minVal, text=min_countries, yshift=-15, showarrow=False, ) fig.add_annotation(max_annotation, row=rowDict[i +1], col=i %5+1) fig.add_annotation(min_annotation, row=rowDict[i +1], col=i %5+1) fig.add_trace(traceTemp, row=rowDict[i +1], col=i %5+1)# dummy trace for the legendlegend_entry = go.Scatter( x=[None], y=[None], mode="markers", marker=dict(color="black", size=6), name="EEZ-weighted average",)fig.add_trace(legend_entry)fig.update_layout(template="simple_white")fig.update_layout( showlegend=True, height=1200, width=1200, title_x=0.5, legend=dict( x=0.4, y=1.15, font=dict(family="sans-serif", size=25, color="black"), bgcolor="White", bordercolor="black", borderwidth=1, ),# margin=dict(l=0, r=0, b=0, t=30), paper_bgcolor="white", font=dict(family="sans-serif", size=25, color="black"),)fig.update_yaxes(title_text="Score", row=1, col=1)fig.update_yaxes(title_text="Score", row=2, col=1)fig.update_xaxes(tickangle=45)fig.update_annotations(font_size=22)fig.write_image("../output/figs/boxPlotIndicators.png", scale=2)fig.write_image("../output/figs/boxPlotIndicators.pdf")fig.show()
# EEZ-weigthed average for the scoreWeakscoreAvgEEZ = eezAverage.reset_index()scoreAvgEEZ = scoreAvgEEZ[ (scoreAvgEEZ.indicator =="scoreWeak") & (scoreAvgEEZ.Year ==2022)]["averageEEZ"].values[0]# EEZ-weigthed average for the progresseezAvgProg = pd.DataFrame(progressTarget.reset_index()).merge( eez, on="Country", how="left")eezAvgProg =sum(eezAvgProg.Slope * eezAvgProg.Area_km2 /sum(eezAvgProg.Area_km2))
# These data SHALL NOT BE USED. See reason on why ENV_AIR_GGE is preferable for the calculation:# (https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Greenhouse_gas_emission_statistics_-_emission_inventories)# (https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Greenhouse_gas_emission_statistics_-_air_emissions_accounts&oldid=551152#Greenhouse_gas_emissions)# CO2, KG_HAB, Eurostat# https://ec.europa.eu/eurostat/databrowser/view/ENV_AC_AINAH_R2/# co2 = pd.read_csv('../data/env_ac_ainah_r2.csv')# co2 = co2[co2['geo'].isin(countryAbb)]# co2['geo'] = co2['geo'].map(abbrev_to_country).fillna(co2['geo'])# co2 = co2.pivot_table(columns='TIME_PERIOD', index='geo', values='OBS_VALUE', aggfunc='mean')# mr = co2.columns[-1] # most recent year# # co2[[2012, 2016, mr]]/1000
2. Carbon emissions per capita
Several sources (see code)
Code
# # CO2, TOTX4_MEMONIA, THS_T thousand tonnes, Eurostat# # https://ec.europa.eu/eurostat/databrowser/view/ENV_AIR_GGE/# co2 = pd.read_csv("../data/env_air_gge.csv")# co2["geo"] = co2["geo"].map(abbrev_to_country).fillna(co2["geo"])# co2 = co2[# co2["geo"].isin(countries)# & (co2.airpol == "CO2")# & (co2.src_crf == "TOTX4_MEMONIA")# ]# co2 = co2.pivot_table(# columns="TIME_PERIOD", index="geo", values="OBS_VALUE", aggfunc="mean"# )# mr = co2.columns[-1] # most recent year# co2pc = co2 / pop * 1000 ## tonnes co2 per capita# co2pc.dropna(how="all", inplace=True)# # maxMin transformation as (max-x)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best# co2pc = (# (co2pc.loc[:, 2012:mr].max().max() - co2pc.loc[:, 2012:mr])# / (co2pc.loc[:, 2012:mr].max().max() - co2pc.loc[:, 2012:mr].min().min())# * 100# ).round(2)# co2pc[[2012, 2016, mr]]# missingCountries(co2pc)
1. Greenhouse gas emissions under the Effort Sharing Decision (ESD)
Several sources (see code)
Code
# # Greenhouse gas emissions under the Effort Sharing Decision (ESD), Million tonnes CO2 equivalent (Mt CO2 eq), European Environment Agency# # https://www.eea.europa.eu/data-and-maps/data/esd-4# ghgESD = pd.read_excel("../data/EEA_ESD-GHG_2022.xlsx", sheet_name=1, skiprows=11)# ghgESD = ghgESD[ghgESD["Geographic entity"].isin(countries)]# ghgESD = ghgESD.set_index("Geographic entity")# ghgESD = ghgESD.dropna(axis=1, how="all")# negativeValues(ghgESD)# mr = ghgESD.columns[-1] # most recent year# ghgESD = ghgESD * 1_000_000
Alternative (dif-ref) score for ESD (measuring this way does not make a lot of sense to me)
Code
# # Alternative (dif-ref) score for ESD (measuring this way does not make a lot of sense to me)# scoreESD2020 = 100 - 100 * (# ghgESD.loc[:, :2020].subtract(allocationESD[2020], axis=0)# ).divide(allocationESD[2020], axis=0)# scoreESD2030 = 100 - 100 * (# ghgESD.loc[:, 2021:].subtract(allocationESD[2030], axis=0)# ).divide(allocationESD[2030], axis=0)# scoreESD1 = scoreESD2020.merge(scoreESD2030, left_index=True, right_index=True)# scoreESD1[scoreESD1 > 100] = 100# # scoreESD1[[2012, 2018, 2021]]
Code
# # Effort sharing regulation# # Get the targets for 2020 and 2030 in percentage# # Member State greenhouse gas emission limits in 2020 and 2030 compared to 2005 greenhouse gas emissions levels# # There targets for 2020 and for 2030# # https://www.eea.europa.eu/data-and-maps/figures/national-progress-towards-greenhouse-gas# # Official journals with the data can be found at (https://climate.ec.europa.eu/eu-action/effort-sharing-member-states-emission-targets_en)# limitESR = pd.read_excel('../data/targetsESR/FIG2-154307-CLIM058-v2-Data.xlsx', sheet_name=1, skiprows=19, header=1, skipfooter=32)# limitESR = limitESR.rename(columns={'Unnamed: 0':'Country', '(Resulting) ESR target 2030 (AR4)':'2030ESRtarget','ESR limit for 2020':'2020ESRtarget', '2005 ESD BJ':'2005Level'})# limitESR = limitESR[['Country', '2020ESRtarget','2030ESRtarget','2005Level']]# limitESR.set_index('Country', inplace=True)# limitESR = limitESR[limitESR.index.isin(countries)]# #UK is not in the dataset, we need to add from the official journal# limitESR# missingCountries(limitESR)
# From 14.6, using Eurostat data for landing values.# Even though the USD-EUR discrepancy does not affect the ratio we calculate,# we get today's exchange rate to convert landing values to USD and have a consistent unit# Solution source: https://stackoverflow.com/a/17250702/14534411# r = requests.get('http://www.ecb.int/stats/eurofxref/eurofxref-daily.xml', stream=True)# tree = ET.parse(r.raw)# root = tree.getroot()# namespaces = {'ex': 'http://www.ecb.int/vocabulary/2002-08-01/eurofxref'}# currency = 'USD'# match = root.find('.//ex:Cube[@currency="{}"]'.format(currency.upper()), namespaces=namespaces)# eurTOusd = float(match.attrib['rate'])# Landings of fishery products, TOTAL FISHERY PRODUCTS, Euro, Eurostat# https://ec.europa.eu/eurostat/databrowser/view/FISH_LD_MAIN/# landing = pd.read_csv('../data/fish_ld_main.csv')# landing['Country'] = landing.geo.map(abbrev_to_country).fillna(landing.geo)# landing = landing[landing.Country.isin(countries)]# landing = landing[['Country', 'TIME_PERIOD', 'OBS_VALUE', 'unit']]# landing['landingUSD'] = landing.OBS_VALUE * eurTOusd# landing.pivot_table(columns='TIME_PERIOD', index='Country', values='landingUSD', aggfunc='mean')
# # Old MPA data (similar results to Natura2k, even though they use the Protected Planet data)# # Marine protected areas (% of territorial waters), World Bank aggregation of https://www.protectedplanet.net/en/thematic-areas/marine-protected-areas# # https://data.worldbank.org/indicator/ER.MRN.PTMR.ZS# mpa = pd.read_csv("../data/API_ER.MRN.PTMR.ZS_DS2.csv", skiprows=4)# mpa = mpa[mpa["Country Name"].isin(countries)].set_index("Country Name")# mpa = mpa.dropna(axis=1, how="all")# mpa = mpa.drop(["Country Code", "Indicator Name", "Indicator Code"], axis=1)# # display all rows# negativeValues(mpa)# mpa = (mpa / 0.3).round(2) # dis-ref with target 30%# mpa[mpa > 100] = 100# mpa.sort_index(inplace=True)# mpa[["2016", "2021"]]# missingCountries(mpa)
Measures under the Marine Strategy Framework Directive (DROPPED)
Code
# The barplot here: https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=CELEX:52018DC0562&from=EN# Comes from https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=CELEX:52018SC0393# The analogous report for 2020 is here https://environment.ec.europa.eu/system/files/2023-04/C_2023_2203_F1_COMMUNICATION_FROM_COMMISSION_EN_V5_P1_2532109.PDF# But the assessment is much shorter. They refer the reader to a JRC report:# https://publications.jrc.ec.europa.eu/repository/handle/JRC129363# That report assesses all the descriptors, but it cannot be compared to the previous assessment.# Moreover, the source code and data are not available.# Overall, it is hard to make an indicator for the measures taken against pressure indicators by the MS.# Countries report different measures and data is poor.